import pandas as pd
pd.set_option('display.max_columns', None), pd.set_option('display.max_rows', None)
import missingno as msno
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
from sklearn import preprocessing
import plotly.graph_objects as plot
import plotly.graph_objects as plot
data = pd.read_csv('EdStatsData.csv')
data.columns = data.columns.str.replace(' ', '_')
data.head(1)
| Country_Name | Country_Code | Indicator_Name | Indicator_Code | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2020 | 2025 | 2030 | 2035 | 2040 | 2045 | 2050 | 2055 | 2060 | 2065 | 2070 | 2075 | 2080 | 2085 | 2090 | 2095 | 2100 | Unnamed:_69 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Arab World | ARB | Adjusted net enrolment rate, lower secondary, ... | UIS.NERA.2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df_cnty = pd.read_csv('EdStatsCountry.csv')
df_cnty.columns = df_cnty.columns.str.replace(' ', '_')
df_cnty.shape, data.shape
#df_cnty[df_cnty['Country Code']=='ARB'].head()
((241, 32), (886930, 70))
#Merge EdstatCountry Data and Country Data
data = data.merge(df_cnty[['Short_Name', 'Country_Code']], 'left', on = 'Country_Code')
data.head(1)
| Country_Name | Country_Code | Indicator_Name | Indicator_Code | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2020 | 2025 | 2030 | 2035 | 2040 | 2045 | 2050 | 2055 | 2060 | 2065 | 2070 | 2075 | 2080 | 2085 | 2090 | 2095 | 2100 | Unnamed:_69 | Short_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Arab World | ARB | Adjusted net enrolment rate, lower secondary, ... | UIS.NERA.2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Arab World |
#Apercu des missings values par vars
pd.DataFrame(data.isna().sum()).T
| Country_Name | Country_Code | Indicator_Name | Indicator_Code | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2020 | 2025 | 2030 | 2035 | 2040 | 2045 | 2050 | 2055 | 2060 | 2065 | 2070 | 2075 | 2080 | 2085 | 2090 | 2095 | 2100 | Unnamed:_69 | Short_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 814642 | 851393 | 851311 | 851385 | 851200 | 799624 | 849447 | 849356 | 849354 | 850121 | 797808 | 848153 | 849419 | 848470 | 848324 | 796634 | 847558 | 848289 | 848378 | 849390 | 762525 | 812493 | 811387 | 811137 | 809468 | 755569 | 810123 | 813477 | 802016 | 768091 | 710254 | 763421 | 762725 | 756567 | 758116 | 702822 | 746618 | 749658 | 752543 | 744822 | 644488 | 740918 | 739666 | 749421 | 773141 | 755872 | 870470 | 886787 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 835494 | 886930 | 3665 |
categorical_vars = [x for x in data.columns if data[x].dtype == 'object']
categorical_vars
numerical_vars = [x for x in data.columns if data[x].dtype != 'object']
#numerical_vars
### BAR PLOT OF MISSING VALUES
msno.bar(data)
<AxesSubplot:>
df = pd.DataFrame(data.isna().sum())
df.info()
df.reset_index(inplace = True)
df.head(5)
<class 'pandas.core.frame.DataFrame'> Index: 71 entries, Country_Name to Short_Name Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 0 71 non-null int64 dtypes: int64(1) memory usage: 3.6+ KB
| index | 0 | |
|---|---|---|
| 0 | Country_Name | 0 |
| 1 | Country_Code | 0 |
| 2 | Indicator_Name | 0 |
| 3 | Indicator_Code | 0 |
| 4 | 1970 | 814642 |
df['%_null'] = df[0]/len(data.axes[0])
df.head()
| index | 0 | %_null | |
|---|---|---|---|
| 0 | Country_Name | 0 | 0.000000 |
| 1 | Country_Code | 0 | 0.000000 |
| 2 | Indicator_Name | 0 | 0.000000 |
| 3 | Indicator_Code | 0 | 0.000000 |
| 4 | 1970 | 814642 | 0.918496 |
df_select = df[df['%_null']<=0.85]
col_select = df_select['index']
my_col = list(col_select)
ad1, ad2 = '2014', '2015',
my_col.append(ad1), my_col.append(ad2)
my_col
['Country_Name', 'Country_Code', 'Indicator_Name', 'Indicator_Code', '2000', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', 'Short_Name', '2014', '2015']
data_final = data[[col for col in my_col ]]
data_final = data_final[['Country_Code', 'Country_Name',
'Indicator_Code', 'Indicator_Name',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015']]
data_ind = data_final.groupby(['Indicator_Code']).mean()
data_ind.reset_index(inplace = True)
'''data_ind['glob_mean'] = data_ind.loc[:, ['2010', '2011', '2012','2013', '2014','2015']].mean(axis = 1)'''
#data_ind.sort_values('glob_mean', ascending = True, inplace = True)
data_ind.head()
#data_ind.isna().sum()
| Indicator_Code | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
|---|---|---|---|---|---|---|---|
| 0 | BAR.NOED.1519.FE.ZS | 8.616181 | NaN | NaN | NaN | NaN | NaN |
| 1 | BAR.NOED.1519.ZS | 7.811250 | NaN | NaN | NaN | NaN | NaN |
| 2 | BAR.NOED.15UP.FE.ZS | 17.773542 | NaN | NaN | NaN | NaN | NaN |
| 3 | BAR.NOED.15UP.ZS | 14.864514 | NaN | NaN | NaN | NaN | NaN |
| 4 | BAR.NOED.2024.FE.ZS | 11.232986 | NaN | NaN | NaN | NaN | NaN |
data_ind = data_ind.merge(data_final[['Indicator_Code', 'Indicator_Name']].drop_duplicates(), 'left', on= 'Indicator_Code')
col = data_ind.columns.tolist()
col_ = col[-1:]+col[:-1]
data_ind = data_ind[col_]
data_ind.isna().sum()
#data_ind.head()
Indicator_Name 0 Indicator_Code 0 2010 1097 2011 1726 2012 1634 2013 1803 2014 1693 2015 1608 dtype: int64
data_final.isna().sum()
data_ind.head()
| Indicator_Name | Indicator_Code | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
|---|---|---|---|---|---|---|---|---|
| 0 | Barro-Lee: Percentage of female population age... | BAR.NOED.1519.FE.ZS | 8.616181 | NaN | NaN | NaN | NaN | NaN |
| 1 | Barro-Lee: Percentage of population age 15-19 ... | BAR.NOED.1519.ZS | 7.811250 | NaN | NaN | NaN | NaN | NaN |
| 2 | Barro-Lee: Percentage of female population age... | BAR.NOED.15UP.FE.ZS | 17.773542 | NaN | NaN | NaN | NaN | NaN |
| 3 | Barro-Lee: Percentage of population age 15+ wi... | BAR.NOED.15UP.ZS | 14.864514 | NaN | NaN | NaN | NaN | NaN |
| 4 | Barro-Lee: Percentage of female population age... | BAR.NOED.2024.FE.ZS | 11.232986 | NaN | NaN | NaN | NaN | NaN |
tab = []
for index in range(len(data_ind.index)) :
tab_ = data_ind.iloc[index].isna().sum()
tab.append(tab_)
data_ind['nb_miss'] = tab
data_ind.sort_values('nb_miss', ascending = True, inplace = True)
#Premier Choix des indicateurs ayant 0 missings values
data_ind = data_ind[data_ind.nb_miss == 0]
#data_ind.to_csv('data_ind.csv')
data_ind.shape
(1283, 9)
data_final.isna().sum()
Country_Code 0 Country_Name 0 Indicator_Code 0 Indicator_Name 0 2010 644488 2011 740918 2012 739666 2013 749421 2014 773141 2015 755872 dtype: int64
#data_ind.shape
data_final[data_final.Indicator_Name.str.contains('nterne')].head(1)
| Country_Code | Country_Name | Indicator_Code | Indicator_Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1375 | ARB | Arab World | IT.NET.USER.P2 | Internet users (per 100 people) | 26.747251 | 29.707761 | 34.014224 | 36.868602 | NaN | NaN |
indicator = ['SP.POP.1524.TO.UN','NY.GDP.MKTP.PP.CD', 'IT.NET.USER.P2','XGDP.23.FSGOV.FDINSTADM.FFD','UIS.E.3.PU']
data_in2 = data_final[data_final.Indicator_Code.isin(indicator)]
data_na = pd.DataFrame(data_in2.isna().sum())
data_na.reset_index(inplace = True)
data_in2.reset_index(inplace = True)
data_in2.head(1)
| index | Country_Code | Country_Name | Indicator_Code | Indicator_Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1218 | ARB | Arab World | UIS.E.3.PU | Enrolment in upper secondary education, public... | NaN | NaN | NaN | NaN | NaN | NaN |
data_in2.isna().sum()
data_in2.shape
(1210, 11)
data_in2['%_null'] = data_na[0]/len(data_in2.axes[0])*100
taux_remp = pd.DataFrame(data_in2.isna().sum(), columns = ['%_na'])
pd.DataFrame(taux_remp/len(data_in2.axes[0])).T
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| index | Country_Code | Country_Name | Indicator_Code | Indicator_Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | %_null | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| %_na | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.285124 | 0.282645 | 0.306612 | 0.316529 | 0.369421 | 0.490083 | 0.991736 |
data_in2 = data_in2[~data_in2.Country_Name.isin(['Arab World','Latin America & Caribbean (excluding high income)', 'Euro Area', 'Europe & Central Asia','Europe & Central Asia (excluding high income)', ' Heavily indebted poor countries (HIPC)','Latin America & Caribbean','Latin America & Caribbean (excluding high income)'
'Lower middle income','High income','OECD members','North America',' Africa (excluding high income)','South Asia','Sub-Saharan Africa','Middle income','Middle East & North Africa (excluding high income)','Low & middle income','Middle East & North Africa', 'Low income','Heavily indebted poor countries (HIPC)','European Union','East Asia & Pacific (excluding high income)','Euro area', 'Lower middle income','World','East Asia & Pacific','Arab World','Upper middle income', 'West Bank and Gaza', 'Middle East & North Africa (excluding high inc...','iddle income','Least developed countries: UN classification'])]
#msno.bar(data_in2)
data_in2.sort_values('Indicator_Name', inplace = True)
data_in2.head()
| index | Country_Code | Country_Name | Indicator_Code | Indicator_Name | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | %_null | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 110 | 81848 | SSA | Sub-Saharan Africa (excluding high income) | UIS.E.3.PU | Enrolment in upper secondary education, public... | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 910 | 668248 | PRI | Puerto Rico | UIS.E.3.PU | Enrolment in upper secondary education, public... | 104731.0 | 93174.0 | 93344.0 | 92922.0 | NaN | NaN | NaN |
| 410 | 301748 | ECU | Ecuador | UIS.E.3.PU | Enrolment in upper secondary education, public... | 414081.0 | 433288.0 | 464579.0 | 517886.0 | 547145.0 | NaN | NaN |
| 915 | 671913 | QAT | Qatar | UIS.E.3.PU | Enrolment in upper secondary education, public... | 21205.0 | 22350.0 | 23237.0 | 22976.0 | 25291.0 | NaN | NaN |
| 405 | 298083 | DOM | Dominican Republic | UIS.E.3.PU | Enrolment in upper secondary education, public... | 425332.0 | 441712.0 | 455900.0 | 461794.0 | 468161.0 | NaN | NaN |
#sepatation des tables avant replace missings values
tables = {}
#for j in range(len(data_in2.Indicator_Code.unique().tolist())) :
for i in data_in2.Indicator_Code.unique().tolist() :
tables[i] = data_in2[data_in2.Indicator_Code==i]
tab1 = tables['UIS.E.3.PU'].ffill().bfill()
tab2 = tables['NY.GDP.MKTP.PP.CD'].ffill().bfill()
tab3 = tables['XGDP.23.FSGOV.FDINSTADM.FFD'].ffill().bfill()
tab4 = tables['IT.NET.USER.P2'].ffill().bfill()
tab5 = tables['SP.POP.1524.TO.UN'].ffill().bfill()
data_in2 = pd.concat([tab1,tab2,tab3, tab4, tab5], axis=0)
data_in2.isna().sum()
index 0 Country_Code 0 Country_Name 0 Indicator_Code 0 Indicator_Name 0 2010 0 2011 0 2012 0 2013 0 2014 0 2015 0 %_null 1085 dtype: int64
#del test
test = data_in2
test['mean'] = test.loc[:,['2010', '2011', '2012', '2013', '2014', '2015']].mean(axis = 1)
#test.reset_index(inplace = True)
test.sort_values(['Indicator_Code', 'mean'], ascending = [True, False ], inplace = True)
test.reset_index(inplace = True)
import plotly.graph_objects as plot
import plotly
plotly.offline.init_notebook_mode()
#import plotly.io as pio
#pio.renderers.default = 'pdf'
#Selection des 10 Tops pays par indicateur
df = pd.DataFrame([])
for ind in test.Indicator_Code.drop_duplicates().tolist() :
for i in range(len(test)) :
if test.Indicator_Code[i] == ind :
df_ = test[test.Indicator_Code == ind]
df_ = df_.iloc[0:10,]
df = df.append(df_)
#Representation graphique
for col in df.Indicator_Code.drop_duplicates() :
plt.figure(figsize=(25, 4), tight_layout=True)
fig = plot.Figure(plot.Bar(
x=df[df.Indicator_Code==col]['Country_Name'],
y=df[df.Indicator_Code==col]['mean'],
marker_color='steelblue',
hovertemplate="%{x} : %{y} <extra></extra>",
textposition='outside',
showlegend=False,
))
fig.layout.template = 'plotly_dark'
layout_yaxis_visible = True
layout_yaxis_showticklabels = False
fig.update_layout(
xaxis_title='X Axis Title',
yaxis_title='Y Axis Title',
xaxis_tickangle=-30,
width=1500,
height=500,
autosize=False,
margin=dict(l=50, r=50, b=100, t=100, pad=4),
title = f'Représentation des TOP 10 Pays Par indicateurs et par Année ({col})'
)
fig.show()
<Figure size 1800x288 with 0 Axes>
<Figure size 1800x288 with 0 Axes>
<Figure size 1800x288 with 0 Axes>
<Figure size 1800x288 with 0 Axes>
<Figure size 1800x288 with 0 Axes>
numerical_col_ = [col for col in test.columns if test[col].dtype != 'object']
#numerical_col = pd.DataFrame(numerical_col_).T
#print(f"categorical vars : \n \n {numerical_col[:5].tolist()}")
numerical_col_ = numerical_col_[2:-2]
numerical_col_
['2010', '2011', '2012', '2013', '2014', '2015']
x = test[numerical_col_].values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
data_norm = pd.DataFrame(x_scaled, columns = numerical_col_)
data_norm.head()
data_norm[['Country_Code','Country_Name', 'Indicator_Code',
'Indicator_Name']]= test[['Country_Code','Country_Name', 'Indicator_Code',
'Indicator_Name']]
data_norm.head(3)
#data_norm.shape
| 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | Country_Code | Country_Name | Indicator_Code | Indicator_Name | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6.240823e-12 | 6.110332e-12 | 5.955325e-12 | 5.731730e-12 | 5.330616e-12 | 4.891835e-12 | ISL | Iceland | IT.NET.USER.P2 | Internet users (per 100 people) |
| 1 | 6.240823e-12 | 6.024644e-12 | 5.858773e-12 | 5.642774e-12 | 5.229174e-12 | 4.821835e-12 | NOR | Norway | IT.NET.USER.P2 | Internet users (per 100 people) |
| 2 | 6.055717e-12 | 5.801668e-12 | 5.691642e-12 | 5.566714e-12 | 5.140276e-12 | 4.848219e-12 | LUX | Luxembourg | IT.NET.USER.P2 | Internet users (per 100 people) |
data_norm.shape
col = data_norm.columns.tolist()
col_ = col[-1:]+col[:-1]
#col_
data_norm2 = data_norm.groupby('Country_Name').mean()
data_norm2.reset_index(inplace = True)
data_norm2.shape
data_norm2['mean'] = data_norm2[['2010','2011', '2012', '2013', '2014', '2015']].median(axis = 1)
data_norm2.head(2)
#Tri décroissant par la moyenne pour choisir TOP 7 pays
data_norm2.sort_values('mean', ascending = False, inplace = True)
countries = data_norm2.Country_Name.iloc[0:7]
countries.tolist()
['United States', 'China', 'India', 'Japan', 'American Samoa', 'Germany', 'Russian Federation']
#filtre des données de la table des 5 indicateurs en gradant les 5 pays ayant le plus grands score
data_in_ctry = data_in2[data_in2.Country_Name.isin(countries)]
data_in_ctry.shape
data_in_ctry = data_in_ctry[['Country_Name', 'Indicator_Code','2010', '2011', '2012', '2013', '2014', '2015']]
data_in_ctry.head()
#data_in_ctry.shape
data_in_ctry = pd.melt(data_in_ctry,
id_vars=['Country_Name', 'Indicator_Code'],
value_vars=list(data_in_ctry.columns[2:]),
var_name='Years',
value_name='indicator_values')
data_norm['mean'] = data_norm.loc[:,['2010', '2011', '2012', '2013', '2014', '2015']].mean(axis = 1)
data_norm.sort_values(data_norm.columns.tolist(), ascending = False, inplace = True)
data_norm.sort_values(['median'], ascending = [False], inplace = True)
df = pd.DataFrame([])
for ind in data_norm.Country_Name.drop_duplicates().tolist() :
for i in range(len(data_norm)) :
if data_norm.Country_Name[i] == ind :
df_ = data_norm[data_norm.Country_Name == ind].iloc[0:5,]
df = df.append(df_)
data_norm.head()
| 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | Country_Code | Country_Name | Indicator_Code | Indicator_Name | median | mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 217 | 1.000000 | 1.000000 | 1.000000 | 0.994251 | 0.948594 | 0.912757 | USA | United States | NY.GDP.MKTP.PP.CD | GDP, PPP (current international $) | 0.975934 | 0.975934 |
| 218 | 0.834313 | 0.899472 | 0.949030 | 1.000000 | 1.000000 | 1.000000 | CHN | China | NY.GDP.MKTP.PP.CD | GDP, PPP (current international $) | 0.947136 | 0.947136 |
| 219 | 0.355004 | 0.372604 | 0.384385 | 0.399875 | 0.400648 | 0.404808 | IND | India | NY.GDP.MKTP.PP.CD | GDP, PPP (current international $) | 0.386221 | 0.386221 |
| 220 | 0.299510 | 0.294703 | 0.293818 | 0.295869 | 0.273402 | 0.260562 | JPN | Japan | NY.GDP.MKTP.PP.CD | GDP, PPP (current international $) | 0.286311 | 0.286311 |
| 221 | 0.299510 | 0.294703 | 0.293818 | 0.295869 | 0.273402 | 0.260562 | ASM | American Samoa | NY.GDP.MKTP.PP.CD | GDP, PPP (current international $) | 0.286311 | 0.286311 |
px.bar(data_norm[data_norm.Country_Name.isin(countries.tolist())], x = 'Country_Name', y = 'mean',
width=1400, height=500, title = 'Représentation des meilleurs pays selon le score crée (moyenne)')
import plotly.express as px
px.line(data_in_ctry, x = 'Years', y = 'indicator_values', color = 'Country_Name', animation_frame = 'Indicator_Code',markers = True, line_shape="spline", render_mode="svg",
width=1400, height=500)